
# required packages
require(dplyr)
require(tidyr)
require(stringr)
require(ggplot2)
require(betareg)
require(readr)
require(lubridate)
require(brms)
require(rstanarm)
require(forcats)
require(bayesplot)
require(inflection)
require(mirt)
require(texreg)
require(patchwork)
require(bayestestR)
require(modelsummary)
library(kableExtra)
library(posterior)
library(tidybayes)

# set working directory to source file location

#setwd(dirname(rstudioapi::getActiveDocumentContext()$path))


# Prepare data ------------------------------------------------------------

# auxiliary code

source("define_ord_betareg.R")

source("prepare_data.R")

# set this to use multicore options
options(brms.backend="cmdstanr")

# whether or not to run all models (will take a few hours), or use saved model objects from disk

run_model <- T

# look at some correlates of political connections

# let's make some factor scores for political connections
# need to recode these variables

all_types <- unique(unlist(str_split(unique(qual_data$pol_connect1_1),pattern=",")))
all_types <- all_types[!is.na(all_types)]

over_types <- lapply(all_types, function(a) {
  out_tib <- tibble(!!a :=rowSums(cbind(
          as.numeric(grepl(x=qual_data$pol_connect1_1,pattern=a)),
          as.numeric(grepl(x=qual_data$pol_connect1_2,pattern=a)),
          as.numeric(grepl(x=qual_data$pol_connect1_3,pattern=a)),
          as.numeric(grepl(x=qual_data$pol_connect1_4,pattern=a)))))
}) %>% bind_cols

names(over_types) <- c("cur_high_beau",
                       "for_low_beau",
                       "for_high_beau",
                       "for_mem_parl",
                       "cur_low_beau",
                       "cur_mem_parl")

# simple 1-D IRT model

pol_score <- mirt(over_types,
                  model=1,
                  itemtype="Rasch")

# put pol score back in the data frame

qual_data$pol_score <- as.numeric(fscores(pol_score))

# table of pol scores by performance

str_wrap_factor <- function(x, ...) {
  levels(x) <- str_wrap(levels(x), ...)
  x
}

qual_data <- mutate(qual_data,perf_1=factor(perf_1, levels=c("More than 20% loss",
                                                             "Between 10 and 20% loss",
                                                             "Between 10 and 5% loss",
                                                             "Between 5 and 0% loss",
                                                             "Broke even",
                                                             "Between 0 and 5% profit margin",
                                                             "Between 5 and 10% profit margin",
                                                             "Between 10 and 20% profit margin",
                                                             "More than 20% profit margin")))


qual_data %>% 
  filter(!is.na(perf_1)) %>% 
  ggplot(aes(y=pol_score,x=str_wrap_factor(perf_1,5))) +
  stat_summary(fun.data=mean_cl_boot) +
  theme(panel.grid = element_blank(),
        panel.background = element_blank(),
        axis.text.x = element_text(size=7)) +
  ylab("Political Connections Score")

ggsave("pol_con_perf.png")

# let's do a heat map of pol connections 1-10 and pol efficiency 1-10

# make a quantitative version of firm performance

qual_data <- mutate(qual_data,perf_quant=as.numeric(as.character(factor(perf_1,
                                                                        labels=c(-30,
                                                                                 -15,
                                                                                 -7.5,
                                                                                 -2.5,
                                                                                 0,
                                                                                 2.5,
                                                                                 7.5,
                                                                                 15,
                                                                                 30)))))

qual_data %>% 
  mutate(pol_con_1=as.numeric(pol_con_1),
         pol_eff_1=as.numeric(pol_eff_1)) %>% 
  filter(!is.na(pol_con_1),!is.na(pol_eff_1)) %>% 
  group_by(pol_con_1,pol_eff_1) %>% 
  mutate(count_pol=log(n(),base = 1.01)) %>% 
  ggplot(aes(y=pol_con_1,x=pol_eff_1)) +
  geom_raster(aes(fill=count_pol)) +
  theme(panel.background = element_blank()) +
  xlab("Political Connections Level") +
  ylab("Political Efficacy Level") +
  guides(size=guide_legend(title="N")) +
  scale_fill_viridis_c(trans="log2",name="Count")

  ggsave("pol_con_eff.png")

# figure out categories of companies

qual_data <- mutate(qual_data,
                    pol_con_1=as.numeric(pol_con_1),
                    pol_eff_1=as.numeric(pol_eff_1),
                    firm_pol_cat=case_when(pol_con_1<6 & pol_eff_1<6~"Non-Connected/Hurt",
                                           pol_con_1>5 & pol_eff_1<6~"Connected/Hurt",
                                           pol_con_1<6 & pol_eff_1>5~"Non-Connected/Helped",
                                           pol_con_1>5 & pol_eff_1>5~"Connected/Helped"))

# see how performance breaks down by these categories

prop.table(table(qual_data$perf_1,qual_data$firm_pol_cat),margin=2)

qual_data %>% 
  group_by(perf_1,firm_pol_cat) %>% 
  count %>% 
  filter(!is.na(firm_pol_cat),!is.na(perf_1)) %>% 
  group_by(firm_pol_cat) %>% 
  mutate(n=n/sum(n)) %>% 
  ggplot(aes(x=perf_1,y=n)) +
  geom_col() +
  facet_wrap(~firm_pol_cat,ncol=1) +
  scale_y_continuous(labels=scales::percent,breaks=c(0,0.1,0.2)) +
  theme(panel.grid = element_blank(),
        panel.background = element_blank(),
        strip.background = element_blank(),
        axis.text.x = element_text(size=7,angle=90)) +
  xlab("")  + ylab("")

ggsave("con_perf.png")

# datasets:

# qual_data - original survey data
# combined_clean - all data excluding duplicates, fast responses
# combined_clean_clicks - data with information on number of clicks on the sliders (much more recent)

combined_clean$combined_perf2 <- as.character(combined_clean$combined_perf) %>% 
  factor(levels=levels(combined_clean$combined_perf))

# produce some descriptive statistics about Venezuela versus the rest

library(ggthemes)
library(patchwork)

p1 <- qual_data %>% 
  mutate(country=recode(country,
                        `Venezuela, Bolivarian Republic of...`="Venezuela")) %>% 
  ggplot(aes(y=pol_con_1,
             x=country)) +
  stat_summary(fun.data="mean_cl_boot") +
  theme_tufte(base_family = "") +
  ggtitle("Political Connections (0 to 10)") +
  labs(x="",y="")

p2 <- qual_data %>% 
  filter(!is.na(bribe_income)) %>% 
  mutate(country=recode(country,
                        `Venezuela, Bolivarian Republic of...`="Venezuela"),
         bribe_income=ordered(bribe_income,
                              levels=c("0%",
                                       "Less than 1%",
                                       "From 2% to less than 5%",
                                       "From 5% to less than 10%",
                                       "From 10% to less than 20%",
                                       "From 20% to less than 30%",
                                       "Over 30%")),
         bribe_income=fct_recode(bribe_income,
           "10%-20%" = "From 10% to less than 20%",
           "2%-5%" = "From 2% to less than 5%",
           "20%-30%" = "From 20% to less than 30%",
           "5%-10%" = "From 5% to less than 10%",
           "<1%" = "Less than 1%",
           ">30%" = "Over 30%")) %>%
  group_by(country) %>% count(bribe_income) %>% 
  mutate(n=n/sum(n)) %>% 
  ggplot(aes(x=bribe_income)) +
  geom_col(aes(fill=country,y=n),
           position=position_dodge(.5)) +
  scale_fill_viridis_d(name="") +
  scale_x_discrete(guide = guide_axis(n.dodge = 2))+
  scale_y_continuous(labels=scales::percent_format()) +
  theme_tufte(base_family = "") +
  ggtitle("Income Paid in Bribes") +
  labs(x="",y="")

p3 <- qual_data %>% 
  filter(!is.na(bribe_increase)) %>% 
  mutate(country=recode(country,
                        `Venezuela, Bolivarian Republic of...`="Venezuela"),
         bribe_increase=ordered(bribe_increase,
                              levels=c("Large decrease",
                                       "Decrease",
                                       "No change",
                                       "Increase",
                                       "Large Increase"))) %>%
  group_by(country) %>% count(bribe_increase) %>% 
  mutate(n=n/sum(n)) %>% 
  ggplot(aes(x=bribe_increase)) +
  geom_col(aes(fill=country,y=n),
           position=position_dodge(.5)) +
  scale_fill_viridis_d(name="") +
  scale_x_discrete(guide = guide_axis(n.dodge = 2))+
  scale_y_continuous(labels=scales::percent_format()) +
  theme_tufte(base_family = "") +
  ggtitle("Increase in Bribes Past Five Years") +
  labs(x="",y="")

p4 <- qual_data %>% 
  filter(!is.na(pol_sit)) %>% 
  mutate(country=recode(country,
                        `Venezuela, Bolivarian Republic of...`="Venezuela"),
         pol_sit=ordered(pol_sit,
                                levels=c("Declined",
                                         "Stayed the same",
                                         "Improved"))) %>%
  group_by(country) %>% count(pol_sit) %>% 
  mutate(n=n/sum(n)) %>% 
  ggplot(aes(x=pol_sit)) +
  geom_col(aes(fill=country,y=n),
           position=position_dodge(.5)) +
  scale_fill_viridis_d(name="") +
  scale_y_continuous(labels=scales::percent_format()) +
  scale_x_discrete(guide = guide_axis(n.dodge = 2))+
  theme_tufte(base_family = "") +
  ggtitle("Political Situation in Country") +
  labs(x="",y="")


(p1 + p2) / (p3 + p4) + plot_annotation(caption="Estimates are sample averages.",tag_levels = "A") +
  plot_layout(guides="collect") &
  theme(title = element_text(size=9),
        legend.position = "bottom")

ggsave("venezuela_diff.pdf",width=5,height=3,scale=1.5)

# fit basic OLS model

lm_fit <- lm(outcome_norm ~ connections + employees + years + own +
               country + sector + assets + combined_perf2,
               data=combined_clean)

coef_map <- c("Daughter of President" = "connectionsboard_daughter_pres",
              "Mid-Level Official on Board" = "connectionsboard_med_crat",
              "Minister on Board" = "connectionsboard_minister",
              "MP on Board" = "connectionsboard_mp",
              "Son of President" = "connectionsboard_son_pres",
              "Army General" = "connectionsown_gen_army",
              "MP Owner" = "connectionsown_mp",
              "PM Nephew Owner" = "connectionsown_pm_nephew",
              "PM Niece Owner" = "connectionsown_pm_niece",
              "Police Owner" = "connectionsown_police",
              "President Classmate Owner" = "connectionsown_pres_classmate",
              "President Daughter In-law" = "connectionsown_pres_daughter_inlaw",
              "Member of President Party" = "connectionsown_pres_party",
              "President Son In-law" = "connectionsown_pres_son_inlaw",
              "Very Profitable"="combined_perf2gain_much",
              "Somewhat Profitable"="combined_perf2gain_some",
              "Break Even"="combined_perf2break_even",
              "Lose Some"="combined_perf2lose_some",
              "Assets" = "assets",
              "Connected" = "connectedtreatment",
              "China" = "countryChina",
              "Egypt" = "countryEgypt",
              "Germany" = "countryGermany",
              "Japan" = "countryJapan",
              "Korea" = "countryKorea",
              "Russia" = "countryRussia",
              "Saudi" = "countrySaudi",
              "Ukraine" = "countryUkraine",
              "USA" = "countryUSA",
              "Venezuela" = "countryVenezuela",
              "Employees" = "employees",
              "Intercept" = "Intercept",
              "Private" = "ownprivate",
              "SOE" = "ownsoe",
              "Construction" = "sectorconstruction",
              "Energy" = "sectorenergy",
              "Financials" = "sectorfinancials",
              "Manufacturing" = "sectormanufacturing",
              "Retail" = "sectorretail",
              "Telecom" = "sectortelecom",
              "Years" = "years")

coef_map2 <- names(coef_map)
names(coef_map2) <- coef_map

options(modelsummary_factory_latex = 'kableExtra')

modelsummary(lm_fit,
                          coef_map=coef_map2,
                          estimate="{estimate} ({std.error}){stars}",
             statistic=NULL,
                          output="lm_mod.tex",
                          notes="Estimates are from OLS regression with standard errors in parentheses.",
                          title="OLS Estimates of Conjoint Experiment Treatments") %>% 
  kable_styling(font_size=8) %>% 
  save_kable("lm_mod.tex")

lm_fit <- lm(outcome_norm ~ gender*perf + employees + years + own,
             data = combined_clean)

summary(lm_fit)

# ordered beta regression

# scale variables 

# base treatment

# run main models ---------------------------------------------------------


if(run_model) {
  base_con <- brm(outcome_norm ~ 0 + Intercept + connected + 
                    employees + years + own + country + sector + assets + combined_perf2,
                  family=ord_beta_reg,
                  data=combined_clean,
                  stanvars=stanvars,
                  prior=priors,
                  iter=1000,backend="cmdstanr",
                  chains=1,cores=8)
  
  saveRDS(base_con,"data/base.rds")
  
  # managers only
  
  base_con_man <- brm(outcome_norm ~ 0 + Intercept + connected + 
                    employees + years + own + country + sector + assets + combined_perf2,
                  family=ord_beta_reg,
                  data=filter(combined_clean,position=="Manager"),
                  stanvars=stanvars,
                  prior=priors,
                  iter=1000,backend="cmdstanr",
                  chains=1,cores=8)
  
  saveRDS(base_con_man,"data/base_man.rds")
  
  # exclude uncommon sectors
  
  base_con_lim_sector <- brm(outcome_norm ~ 0 + Intercept + connected + 
                        employees + years + own + country + sector + assets + combined_perf2,
                      family=ord_beta_reg,
                      data=filter(combined_clean,
                                  !(sector_1 %in% c("HUMAN HEALTH AND SOCIAL WORK ACTIVITIES",
                                                    "ACTIVITIES OF EXTRATERRITORIAL ORGANISATIONS AND BODIES",
                                                    "ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS- AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN USE",
                                                    "PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY",
                                                    "PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES"))),
                      stanvars=stanvars,
                      prior=priors,
                      iter=1000,backend="cmdstanr",
                      chains=1,cores=8)
  
  saveRDS(base_con_lim_sector,"data/base_con_lim_sector.rds")
  
} else {
  base_con <- readRDS("data/base.rds")
  base_con_man <- readRDS("data/base_man.rds")
  base_con_lim_sector <- readRDS("data/base_con_lim_sector.rds")
}

# all treatments


if(run_model) {
  
  all_con <- brm(outcome_norm ~ 0 + Intercept + connections + employees + years + own + 
                   country + sector + assets + combined_perf2,
                  family=ord_beta_reg,
                  data=combined_clean,
                  stanvars=stanvars,
                  prior=priors,
                 iter=1000,backend="cmdstanr",
                 chains=1,cores=8)
  
  saveRDS(all_con,"data/all.rds")
  
  all_con_man <- brm(outcome_norm ~ 0 + Intercept + connections + employees + years + own + 
                   country + sector + assets + combined_perf2,
                 family=ord_beta_reg,
                 data=filter(combined_clean,position=="Manager"),
                 stanvars=stanvars,
                 prior=priors,
                 iter=1000,backend="cmdstanr",
                 chains=1,threads =8)
  
  saveRDS(all_con_man,"data/all_man.rds")
  
  
  all_con_lim_sec <- brm(outcome_norm ~ 0 + Intercept + connections + employees + years + own + 
                       country + sector + assets + combined_perf2,
                     family=ord_beta_reg,
                     data=filter(combined_clean,
                                      !(sector_1 %in% c("HUMAN HEALTH AND SOCIAL WORK ACTIVITIES",
                                                        "ACTIVITIES OF EXTRATERRITORIAL ORGANISATIONS AND BODIES",
                                                        "ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS- AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN USE",
                                                        "PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY",
                                                        "PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES"))),
                     stanvars=stanvars,
                     prior=priors,
                     iter=1000,backend="cmdstanr",
                     chains=1,threads =8)
  
  saveRDS(all_con_lim_sec,"data/all_con_lim_sec.rds")
  
} else {
  all_con <- readRDS("data/all.rds")
  all_con_man <- readRDS("data/all_man.rds")
  all_con_lim_sec <- readRDS("data/all_con_lim_sec.rds")
}

# connected X country

if(run_model) {
  country_con <- brm(outcome_norm ~ 0 + Intercept + connected*country_resp + employees + years + own + country + sector + assets,
                  family=ord_beta_reg,
                  data=combined_clean,
                  stanvars=stanvars,
                  prior=priors,
                  iter=1000,backend="cmdstanr",
                  chains=1,threads =8)
  
  saveRDS(country_con,"data/base_country.rds")
  
  country_con_man <- brm(outcome_norm ~ 0 + Intercept + connected*country_resp + employees + years + own + country + sector + assets,
                     family=ord_beta_reg,
                     data=filter(combined_clean,position=="Manager"),
                     stanvars=stanvars,
                     prior=priors,
                     iter=1000,backend="cmdstanr",
                     chains=1,threads =8)
  
  saveRDS(country_con_man,"data/base_country_man.rds")
  
} else {
  country_con <- readRDS("data/base_country.rds")
  country_con_man <- readRDS("data/base_country_man.rds")
}

# connected X pol_help 

if(run_model) {
  pol_int_con <- brm(outcome_norm ~ 0 + Intercept + connected*pol_eff_1 + employees + years + own + country + sector + assets,
                     family=ord_beta_reg,
                     data=combined_clean,
                     stanvars=stanvars,
                     prior=priors,
                     iter=1000,backend="cmdstanr",
                     chains=1,threads =8)
  
  saveRDS(pol_int_con,"data/pol_int_con.rds")
  
  pol_int_con_man <- brm(outcome_norm ~ 0 + Intercept + connected*pol_eff_1 + employees + years + own + country + sector + assets,
                     family=ord_beta_reg,
                     data=filter(combined_clean,position=="Manager"),
                     stanvars=stanvars,
                     prior=priors,
                     iter=1000,backend="cmdstanr",
                     chains=1,threads =8)
  
  saveRDS(pol_int_con_man,"data/pol_int_con_man.rds")
  
} else {
  pol_int_con <- readRDS("data/pol_int_con.rds")
  pol_int_con_man <- readRDS("data/pol_int_con_man.rds")
}

# connected X connected

if(run_model) {
  pol_con_con <- brm(outcome_norm ~ 0 + Intercept + connected*pol_con_1 + employees + years + own + country + sector + assets,
                     family=ord_beta_reg,
                     data=combined_clean,
                     stanvars=stanvars,
                     prior=priors,
                     iter=1000,backend="cmdstanr",
                     chains=1,threads =8)
  
  saveRDS(pol_con_con,"data/pol_con_con.rds")
  
  pol_con_con_man <- brm(outcome_norm ~ 0 + Intercept + connected*pol_con_1 + employees + years + own + country + sector + assets,
                     family=ord_beta_reg,
                     data=filter(combined_clean,position=="Manager"),
                     stanvars=stanvars,
                     prior=priors,
                     iter=1000,backend="cmdstanr",
                     chains=1,threads =8)
  
  saveRDS(pol_con_con_man,"data/pol_con_con_man.rds")
  
} else {
  pol_con_con <- readRDS("data/pol_con_con.rds")
  pol_con_con_man <- readRDS("data/pol_con_con_man.rds")
}

# connected X respondent sector

if(run_model) {
  pol_con_sector <- brm(outcome_norm ~ 0 + Intercept + connected*sector_1 + employees + years + own + country + sector + assets,
                     family=ord_beta_reg,
                     data=combined_clean,
                     stanvars=stanvars,
                     prior=priors,
                     iter=1000,backend="cmdstanr",
                     chains=1,threads =8)
  
  saveRDS(pol_con_sector,"data/pol_con_sector.rds")
  
  pol_con_sector_man <- brm(outcome_norm ~ 0 + Intercept + connected*sector_1 + employees + years + own + country + sector + assets,
                        family=ord_beta_reg,
                        data=filter(combined_clean,position=="Manager"),
                        stanvars=stanvars,
                        prior=priors,
                        iter=1000,backend="cmdstanr",
                        chains=1,threads =8)
  
  saveRDS(pol_con_sector_man,"data/pol_con_sector_man.rds")
  
} else {
  pol_con_sector <- readRDS("data/pol_con_sector.rds")
  pol_con_sector_man <- readRDS("data/pol_con_sector_man.rds")
}

# connected X profits

if(run_model) {
  con_profit_updown <- brm(bf(outcome_norm ~ 0 + Intercept + connected*total_perf,
                       decomp = "QR"),
                     family=ord_beta_reg,
                     data=combined_clean,
                     stanvars=stanvars,
                     prior=priors,
                     iter=1000,backend="cmdstanr",
                     chains=1,threads =8)
  
  saveRDS(con_profit_updown,"data/con_profit_updown.rds")
  
  con_profit_updown_man <- brm(bf(outcome_norm ~ 0 + Intercept + connected*total_perf,
                              decomp = "QR"),
                           family=ord_beta_reg,
                           data=filter(combined_clean,position=="Manager"),
                           stanvars=stanvars,
                           prior=priors,
                           iter=1000,backend="cmdstanr",
                           chains=1,threads =8)
  
  saveRDS(con_profit_updown_man,"data/con_profit_updown_man.rds")
  
} else {
  con_profit_updown <- readRDS("data/con_profit_updown.rds")
  con_profit_updown_man <- readRDS("data/con_profit_updown_man.rds")
}

if(run_model) {
  con_profit_lm <- brm(bf(outcome_norm ~ 0 + Intercept + connected*squared + connected*linear,
                              decomp = "QR"),
                           family=ord_beta_reg,
                           data=combined_clean,
                           stanvars=stanvars,
                           prior=priors,
                       iter=1000,backend="cmdstanr",
                       chains=1,threads =8)
  
  saveRDS(con_profit_lm,"data/con_profit_lm.rds")
  
  con_profit_lm_man <- brm(bf(outcome_norm ~ 0 + Intercept + connected*squared + connected*linear,
                          decomp = "QR"),
                       family=ord_beta_reg,
                       data=filter(combined_clean,position=="Manager"),
                       stanvars=stanvars,
                       prior=priors,
                       iter=1000,backend="cmdstanr",
                       chains=1,threads =8)
  
  saveRDS(con_profit_lm_man,"data/con_profit_lm_man.rds")
  
} else {
  con_profit_lm <- readRDS("data/con_profit_lm.rds")
  con_profit_lm_man <- readRDS("data/con_profit_lm_man.rds")
}

# profit as ordinal category

if(run_model) {
  con_profit_ord <- brm(bf(outcome_norm ~ 0 + Intercept + connected*mo(perf),
                          decomp = "QR"),
                       family=ord_beta_reg,
                       data=combined_clean,
                       stanvars=stanvars,
                       prior=priors,
                       iter=1000,backend="cmdstanr",
                       chains=1,threads =8)
  
  saveRDS(con_profit_ord,"data/con_profit_ord.rds")
  
  con_profit_ord_man <- brm(bf(outcome_norm ~ 0 + Intercept + connected*mo(perf),
                           decomp = "QR"),
                        family=ord_beta_reg,
                        data=filter(combined_clean,position=="Manager"),
                        stanvars=stanvars,
                        prior=priors,
                        iter=1000,backend="cmdstanr",
                        chains=1,threads =8)
  
  saveRDS(con_profit_ord_man,"data/con_profit_ord_man.rds")
  
} else {
  con_profit_ord <- readRDS("data/con_profit_ord.rds")
  con_profit_ord_man <- readRDS("data/con_profit_ord_man.rds")
}

# tfp

if(run_model) {
  tfp_full <- brm(bf(outcome_norm ~ 0 + Intercept + s(tfp,by=connected)),
                        family=ord_beta_reg,
                        data=tfp_data,
                        stanvars=stanvars,
                        prior=priors,
                  iter=1000,backend="cmdstanr",
                  chains=1,threads =8)
  
  saveRDS(tfp_full,"data/tfp_full.rds")
  
  tfp_full_man <- brm(bf(outcome_norm ~ 0 + Intercept + s(tfp,by=connected)),
                  family=ord_beta_reg,
                  data=filter(tfp_data,position=="Manager"),
                  stanvars=stanvars,
                  prior=priors,
                  iter=1000,backend="cmdstanr",
                  chains=1,threads =8)
  
  saveRDS(tfp_full_man,"data/tfp_full_man.rds")
  
} else {
  tfp_full <- readRDS("data/tfp_full.rds")
  tfp_full_man <- readRDS("data/tfp_full_man.rds")
}

# tfp with polynomials

if(run_model) {
  # tfp_full_poly <- brm(bf(outcome_norm ~ 0 + Intercept + connected*tfp*poly(years,3),
  #                         decomp="QR"),
  #                 family=ord_beta_reg,
  #                 data=tfp_data,
  #                 stanvars=stanvars,
  #                 prior=priors,
  #                 iter=1000,
  #                 chains=1,cores=1)
  
  #saveRDS(tfp_full_poly,"data/tfp_full_poly.rds")
} else {
  #tfp_full_poly <- readRDS("data/tfp_full_poly.rds")
}

# sales

if(run_model) {
  profit_full <- brm(bf(outcome_norm ~ 0 + Intercept + s(sales_series,by=connected)),
                  family=ord_beta_reg,
                  data=tfp_data,
                  stanvars=stanvars,
                  prior=priors,
                  iter=1000,backend="cmdstanr",
                  chains=1,threads =8)
  
  saveRDS(profit_full,"data/profit_full.rds")
  
  profit_full_man <- brm(bf(outcome_norm ~ 0 + Intercept + s(sales_series,by=connected)),
                     family=ord_beta_reg,
                     data=filter(tfp_data,position=="Manager"),
                     stanvars=stanvars,
                     prior=priors,
                     iter=1000,backend="cmdstanr",
                     chains=1,threads =8)
  
  saveRDS(profit_full_man,"data/profit_full_man.rds")
  
} else {
  profit_full <- readRDS("data/profit_full.rds")
  profit_full_man <- readRDS("data/profit_full_man.rds")
}





# gender
if(run_model) {
  gender <- brm(outcome_norm ~ 0 + Intercept + gender + employees + years + own + Duration + position,
                family=ord_beta_reg,
                data=combined_clean,
                stanvars=stanvars,
                prior=priors,
                iter=1000,backend="cmdstanr",
                chains=1,threads =8)
  
  saveRDS(gender ,"data/gender1.rds")
  
  
} else {
  gender <- readRDS("data/gender1.rds")
}


# gender by country
# nothing to see here

if(run_model) {
  gender_country <- brm(outcome_norm ~ 0 + Intercept + gender*country_resp + employees + years + own + Duration + position,
                        family=ord_beta_reg,
                        data=combined_clean,
                        stanvars=stanvars,
                        prior=priors,
                        iter=1000,backend="cmdstanr",
                        chains=1,threads =8)
  
  saveRDS(gender_country ,"data/gender_country.rds")
} else {
  gender_country <- readRDS("data/gender_country.rds")
}


# gender by performance

if(run_model) {
  gender_combined_perf_conn <- brm(outcome_norm ~ 0 + Intercept + gender*mo(combined_perf) + employees + years + own + Duration + position,
                                   family=ord_beta_reg,
                                   data=combined_clean,
                                   stanvars=stanvars,
                                   prior=priors,
                                   iter=1000,backend="cmdstanr",
                                   chains=1,threads =8)
  
  saveRDS(gender_combined_perf_conn  ,"data/gender_combined_perf_conn.rds")
} else {
  gender_combined_perf_conn  <- readRDS("data/gender_combined_perf_conn.rds")
}

# connections and expropriation threats

if(run_model) {
  con_exprop <- brm(bf(outcome_norm ~ 0 + Intercept + connected*sum_exprop,
                           decomp = "QR"),
                        family=ord_beta_reg,
                        data=combined_clean,
                        stanvars=stanvars,
                        prior=priors,
                    iter=1000,backend="cmdstanr",
                    chains=1,threads =8)
  
  saveRDS(con_exprop,"data/con_exprop.rds")
  
  con_exprop_man <- brm(bf(outcome_norm ~ 0 + Intercept + connected*sum_exprop,
                       decomp = "QR"),
                    family=ord_beta_reg,
                    data=filter(combined_clean,position=="Manager"),
                    stanvars=stanvars,
                    prior=priors,
                    iter=1000,backend="cmdstanr",
                    chains=1,threads =8)
  
  saveRDS(con_exprop_man,"data/con_exprop_man.rds")
  
} else {
  con_exprop<- readRDS("data/con_exprop.rds")
  con_exprop_man <- readRDS("data/con_exprop_man.rds")
}

# ordinal expropriation risk measure

if(run_model) {
  con_exprop_ord <- brm(bf(outcome_norm ~ 0 + Intercept + connected*man_belief_2,
                       decomp = "QR"),
                    family=ord_beta_reg,
                    data=combined_clean,
                    stanvars=stanvars,
                    prior=priors,
                    iter=1000,backend="cmdstanr",
                    chains=1,threads =8)
  
  saveRDS(con_exprop_ord,"data/con_exprop_ord.rds")
  
  con_exprop_ord_man <- brm(bf(outcome_norm ~ 0 + Intercept + connected*man_belief_2,
                           decomp = "QR"),
                        family=ord_beta_reg,
                        data=filter(combined_clean,position=="Manager"),
                        stanvars=stanvars,
                        prior=priors,
                        iter=1000,backend="cmdstanr",
                        chains=1,threads =8)
  
  saveRDS(con_exprop_ord_man,"data/con_exprop_ord_man.rds")
  
} else {
  con_exprop_ord <- readRDS("data/con_exprop_ord.rds")
  con_exprop_ord_man <- readRDS("data/con_exprop_ord_man.rds")
}

# run an enforcement/connections mediation model
# with an interaction - kind of weird but whatevs
if(run_model) {
  library(ordbetareg)
  library(bayestestR)
  m1 <- bf(sum_exprop_m ~ 0 + Intercept + pol_con_1,
           decomp = "QR",family=ord_beta_reg)
  m2 <- bf(outcome_norm ~ 0 + Intercept + sum_exprop_m + pol_con_1,
           decomp = "QR",family=ord_beta_reg)
  
  combined_clean$sum_exprop_m <- ((combined_clean$sum_exprop) - min(combined_clean$sum_exprop,na.rm=T))/(max(combined_clean$sum_exprop,na.rm=T) - min(combined_clean$sum_exprop,na.rm=T))
  
  combined_clean <- mutate(combined_clean, 
         int_exprop=sum_exprop_m * as.numeric(connected=="treatment"),
         int_resp_con=pol_con_1 * as.numeric(connected=="treatment"))
  
  # con_exprop_med <- brm(m1 + m2 + set_rescor(FALSE),
  #                       data=combined_clean,
  #                       stanvars=stanvars,
  #                       iter=1000,threads=threading(4),
  #                       chains=1,cores=1)
  
  # other approach - calculate control and treatment interaction effects seperately
  
  con_med_control <- brm(m1 + m2 + set_rescor(FALSE),
                         data=filter(combined_clean, connected!="treatment"),
                         stanvars=stanvars,
                         iter=1000,threads=8,backend="cmdstanr",
                         chains=1)
  
  con_med_treat <- brm(m1 + m2 + set_rescor(FALSE),
                         data=filter(combined_clean, connected=="treatment"),
                         stanvars=stanvars,
                         iter=1000,backend="cmdstanr",
                       chains=1,cores=1,threads =8)
  
  saveRDS(con_med_control,"data/con_med_control.rds")
  saveRDS(con_med_treat,"data/con_med_treat.rds")
  
} else {
  con_med_control <- readRDS("data/con_med_control.rds")
  con_med_treat <- readRDS("data/con_med_treat.rds")
}

# foreign vs domestic

# ordinal expropriation risk measure

combined_clean$foreign <- ifelse(combined_clean$country %in% c("Egypt","Venezuela","Ukraine"),
                                 "Domestic",
                                 "Foreign")

if(run_model) {
  con_foreign <- brm(bf(outcome_norm ~ 0 + Intercept + connected*registration,
                           decomp = "QR"),
                        family=ord_beta_reg,
                        data=combined_clean,
                        stanvars=stanvars,
                        prior=priors,
                        iter=1000,threads=8,backend="cmdstanr",
                     chains=1,cores=1)
  
  saveRDS(con_foreign,"data/con_foreign.rds")
  
  con_foreign_man <- brm(bf(outcome_norm ~ 0 + Intercept + connected*registration,
                               decomp = "QR"),
                            family=ord_beta_reg,
                            data=filter(combined_clean,position=="Manager"),
                            stanvars=stanvars,
                            prior=priors,
                            iter=1000,
                         threads=8,backend="cmdstanr",
                         chains=1)
  
  saveRDS(con_foreign_man,"data/con_foreign_man.rds")
  
} else {
  con_foreign <- readRDS("data/con_foreign.rds")
  con_foreign_man <- readRDS("data/con_foreign_man.rds")
}


# Export tables -----------------------------------------------------------



require(kableExtra)

# report sector interactions -- table form (for appendix)

sum1 <- summary(pol_con_sector)$fixed %>% mutate(Sample="Full")
sum2 <- summary(pol_con_sector_man)$fixed %>% mutate(Sample="Managers")

sector_table <- bind_rows(sum1, sum2) %>% 
  mutate(Sector=c(row.names(sum1),
                  row.names(sum2))) %>% 
  filter(grepl(x=Sector, pattern=":")) %>% 
  mutate(Sector=stringr::str_extract(string = Sector,
                                     "(?<=1).+"),
         Sector=fct_recode(Sector,
           "Embassies" = "ACTIVITIESOFEXTRATERRITORIALORGANISATIONSANDBODIES",
           "Household" = "ACTIVITIESOFHOUSEHOLDSASEMPLOYERS;UNDIFFERENTIATEDGOODSMANDSERVICESMPRODUCINGACTIVITIESOFHOUSEHOLDSFOROWNUSE",
           "Business Support" = "ADMINISTRATIVEANDSUPPORTSERVICEACTIVITIES",
           "Agriculture" = "AGRICULTUREFORESTRYANDFISHING",
           "Arts" = "ARTSENTERTAINMENTANDRECREATION",
           "Construction" = "CONSTRUCTION",
           "Education" = "EDUCATION",
           "Utilities" = "ELECTRICITYGASSTEAMANDAIRCONDITIONINGSUPPLY",
           "Finance" = "FINANCIALANDINSURANCEACTIVITIES",
           "Health Care" = "HUMANHEALTHANDSOCIALWORKACTIVITIES",
           "ICT" = "INFORMATIONANDCOMMUNICATION",
           "Manufacturing" = "MANUFACTURING",
           "Mining" = "MININGANDQUARRYING",
           "Other Service" = "OTHERSERVICEACTIVITIES",
           "Professional" = "PROFESSIONALSCIENTIFICANDTECHNICALACTIVITIES",
           "Government" = "PUBLICADMINISTRATIONANDDEFENCE;COMPULSORYSOCIALSECURITY",
           "Real Estate" = "REALESTATEACTIVITIES",
           "Transportation" = "TRANSPORTATIONANDSTORAGE",
           "Water" = "WATERSUPPLY;SEWERAGEWASTEMANAGEMENTANDREMEDIATIONACTIVITIES",
           "Retail" = "WHOLESALEANDRETAILTRADE;REPAIROFMOTORVEHICLESANDMOTORCYCLES"
         ))

sector_table %>% 
  arrange(Sector, Sample) %>% 
  select(Sector, Sample, `5%`="l-95% CI",Estimate,
         `95%`="u-95% CI") %>% 
  # mutate(Estimate=ifelse(sign(`5%`)==sign(`95%`),
  #                             paste0(round(Estimate,3),"*"),
  #                             as.character(round(Estimate,3)))) %>% 
  kable(digits=3,row.names = F,format = "latex",
        caption="Coefficients for Connection Treatment Subset by Sector",
        label="coefsec") %>% 
  kable_styling(latex_options=c("hold_position",
                                "striped"),
                font_size=9,
                full_width = T) %>% 
  add_footnote("Estimates are means and quantiles of the empirical posterior distribution. Stars indicate estimates where the posterior interval does not overlap with zero.",
               threeparttable = T) %>% 
  save_kable("sector_table.tex")

# export foreign firms sector analysis

modelsummary(list(`Full Sample`=con_foreign,
                  `Managers Only`=con_foreign_man),metrics="RMSE",
             statistic="conf.int",label="foreign",
             coef_map=c("b_Intercept"="Intercept",
                        "b_connectedtreatment"="Treatment",
                        "b_registrationForeign"="Foreign",
                        "b_connectedtreatment:registrationForeign"="TreatmentXForeign",
                        "b_connectedtreatment:pol_con_1"="TreatmentXConnections",
                        "b_registrationForeign:pol_con_1"="ForeignXConnections",
                        "b_connectedtreatment:registrationForeign:pol_con_1"="TreatmentXForeignXConnections",
                        "phi"),output="foreign_table.tex",
             title = "Estimates of ATE Subset by Foreign and Domestic Companies and Political Connections") %>% 
  kable_styling(latex_options=c("hold_position",
                                "striped"),
                font_size=9,
                full_width = T) %>% 
  add_footnote("Estimates are means and quantiles of the empirical posterior distribution.",
               threeparttable = T) %>% 
  save_kable("foreign_table.tex")
  
  

# make a mediation table

# get mediation outcomes

med_cont <- as.data.frame(mediation(con_med_control, treatment="pol_con_1"))
med_treat <- as.data.frame(mediation(con_med_treat, treatment="pol_con_1"))

# calculate ATE as treat - control

library(tidyr)
library(kableExtra)

combine_tab <- tibble(`Total Effect`=med_treat$effect_total - med_cont$effect_total,
                      `Indirect Effect`=med_treat$effect_indirect - med_cont$effect_indirect,
                      `Direct Effect`=med_treat$effect_direct - med_cont$effect_direct) %>% 
            mutate(`% Mediated`=`Indirect Effect`/`Total Effect`,
                   iter=1:n()) %>% 
          gather(key="Estimand",value="draw",-iter) %>% 
  group_by(Estimand) %>% 
  summarize(Estimate=median(draw),
            `5% Low` = quantile(draw,.05),
            `95% High`=quantile(draw,.95))

combine_tab %>% 
  kable(caption="Mediation Analysis of Expropriation Threats and Respondent Political Connections",
        label="mediation",format = "latex",digits=3,booktabs=T) %>% 
  kable_styling(latex_options = "hold_position") %>% 
  add_footnote("Respondent political connections are the prior causal variable and the sum of expropriation threats is the mediator. Outcome is investment share in a given company.") %>% 
  save_kable("med_table.tex")


# Export plots ------------------------------------------------------------


# do some coef plots

require(tidybayes)
require(ggthemes)

all_con_res <- all_con %>% 
  gather_draws(`b_connect.*`,regex=T) %>% 
  median_qi %>% 
  mutate(variable_rec = fct_recode(.variable,
                                                "Daughter of President" = "b_connectionsboard_daughter_pres",
                                                "Mid-Level Official\non Board" = "b_connectionsboard_med_crat",
                                                "Minister\non Board" = "b_connectionsboard_minister",
                                                "MP\non Board" = "b_connectionsboard_mp",
                                                "Son of President" = "b_connectionsboard_son_pres",
                                                "Army\nGeneral" = "b_connectionsown_gen_army",
                                                "MP\nOwner" = "b_connectionsown_mp",
                                                "PM Nephew\nOwner" = "b_connectionsown_pm_nephew",
                                                "PM Niece\nOwner" = "b_connectionsown_pm_niece",
                                                "Police\nOwner" = "b_connectionsown_police",
                                                "President Classmate\nOwner" = "b_connectionsown_pres_classmate",
                                                "President\nDaughter In-law" = "b_connectionsown_pres_daughter_inlaw",
                                                "Member of\nPresident Party" = "b_connectionsown_pres_party",
                                                "President\nSon In-law" = "b_connectionsown_pres_son_inlaw"
  ))

all_con_res %>% 
  ggplot(aes(y=.value,x=reorder(variable_rec,.value))) +
  geom_pointrange(aes(ymin=.lower,
                      ymax=.upper),fatten=2) +
  geom_text(aes(label=round(.value,digits=2)),vjust=-.75,size=3) +
  theme_tufte(base_family="") +
  coord_flip() +
  geom_hline(yintercept=0,linetype=2) +
  labs(y="Logit Coefficient",x="",
       caption=stringr::str_wrap("Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution."))

ggsave("all_con.png")

all_con_man_res <- all_con_man %>% 
  gather_draws(`b_connect.*`,regex=T) %>% 
  median_qi %>% 
  mutate(variable_rec = fct_recode(.variable,
                                   "Daughter of President" = "b_connectionsboard_daughter_pres",
                                   "Mid-Level Official\non Board" = "b_connectionsboard_med_crat",
                                   "Minister\non Board" = "b_connectionsboard_minister",
                                   "MP\non Board" = "b_connectionsboard_mp",
                                   "Son of President" = "b_connectionsboard_son_pres",
                                   "Army\nGeneral" = "b_connectionsown_gen_army",
                                   "MP\nOwner" = "b_connectionsown_mp",
                                   "PM Nephew\nOwner" = "b_connectionsown_pm_nephew",
                                   "PM Niece\nOwner" = "b_connectionsown_pm_niece",
                                   "Police\nOwner" = "b_connectionsown_police",
                                   "President Classmate\nOwner" = "b_connectionsown_pres_classmate",
                                   "President\nDaughter In-law" = "b_connectionsown_pres_daughter_inlaw",
                                   "Member of\nPresident Party" = "b_connectionsown_pres_party",
                                   "President\nSon In-law" = "b_connectionsown_pres_son_inlaw"
  ))

all_con_man_res %>% 
  ggplot(aes(y=.value,x=reorder(variable_rec,.value))) +
  geom_pointrange(aes(ymin=.lower,
                      ymax=.upper),fatten=2) +
  geom_text(aes(label=round(.value,digits=2)),vjust=-.75,size=3) +
  theme_tufte(base_family = "") +
  coord_flip() +
  geom_hline(yintercept=0,linetype=2) +
  labs(y="Logit Coefficient",x="",
       caption=stringr::str_wrap("Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution."))

ggsave("all_con_man.png")

all_con_lim_sec_reg <- all_con_lim_sec %>% 
  gather_draws(`b_connect.*`,regex=T) %>% 
  median_qi %>% 
  mutate(variable_rec = fct_recode(.variable,
                                   "Daughter of President" = "b_connectionsboard_daughter_pres",
                                   "Mid-Level Official\non Board" = "b_connectionsboard_med_crat",
                                   "Minister\non Board" = "b_connectionsboard_minister",
                                   "MP\non Board" = "b_connectionsboard_mp",
                                   "Son of President" = "b_connectionsboard_son_pres",
                                   "Army\nGeneral" = "b_connectionsown_gen_army",
                                   "MP\nOwner" = "b_connectionsown_mp",
                                   "PM Nephew\nOwner" = "b_connectionsown_pm_nephew",
                                   "PM Niece\nOwner" = "b_connectionsown_pm_niece",
                                   "Police\nOwner" = "b_connectionsown_police",
                                   "President Classmate\nOwner" = "b_connectionsown_pres_classmate",
                                   "President\nDaughter In-law" = "b_connectionsown_pres_daughter_inlaw",
                                   "Member of\nPresident Party" = "b_connectionsown_pres_party",
                                   "President\nSon In-law" = "b_connectionsown_pres_son_inlaw"
  ))

all_con_lim_sec_reg %>% 
  ggplot(aes(y=.value,x=reorder(variable_rec,.value))) +
  geom_pointrange(aes(ymin=.lower,
                      ymax=.upper),fatten=2) +
  geom_text(aes(label=round(.value,digits=2)),vjust=-.75,size=3) +
  theme_tufte(base_family = "") +
  coord_flip() +
  geom_hline(yintercept=0,linetype=2) +
  labs(y="Logit Coefficient",x="",
       caption=stringr::str_wrap("Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution."))

ggsave("all_con_lim_sec.png")

# compare all three types

bind_rows(list(All=all_con_res,
               Managers=all_con_man_res,
               Sectors=all_con_lim_sec_reg),
          .id="Model") %>% 
  ggplot(aes(y=.value,x=reorder(variable_rec,.value))) +
  geom_pointrange(aes(ymin=.lower,
                      ymax=.upper,
                      colour=Model,
                      shape=Model,
                      linetype=Model),
                  position=position_dodge(.5)) +
  theme_tufte(base_family = "") +
  coord_flip() +
  geom_hline(yintercept=0,linetype=2) +
  labs(y="Logit Coefficient",x="",
       caption=stringr::str_wrap("Political connection treatments from different samples (All, Managers only, excluding questionable Sectors) are collapsed to a single connected vs. un-connected binary treatment. Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution."))

ggsave("compare_samples_all.png",width=5,height=4,scale=1.3)

# output to latex for inclusion in paper

# texreg(all_con,file="all_con.tex",booktabs = T,
#        omit.coef="ntercept",float.pos="H",
#        caption.above = T,threeparttable = T,use.packages = F,label="allcon",
#        dcolumn = T,caption="ATEs for All Political Connection Treatments",scalebox=.5,
#        longtable = T,
#        custom.coef.names=c("Intercept",
#                            "Daughter of President",
#                            "Mid-level Bureaucrat on Board",
#                            "Cabinet Minister on Board",
#                            "MP on Board",
#                            "Son of President on Board",
#                            "Owner is Army General",
#                            "Owner is an MP",
#                            "Owner is Nephew of PM",
#                            "Owner is Niece of PM",
#                            "Owner is Police Officer",
#                            "Owner is President's Classmate",
#                            "Owner is President's Daughter-in-law",
#                            "Owner belongs to President's Party",
#                            "Owner is President's Son in Law",
#                            "No. Employees",
#                            "Years Operation",
#                            "Private",
#                            "SOE",
#                            "Germany",
#                            "Saudi Arabia",
#                            "China",
#                            "Korea",
#                            "Egypt",
#                            "USA",
#                            "Japan",
#                            "Russia",
#                            "Ukraine",
#                            "Venezuela",
#                            "Construction",
#                            "Energy",
#                            "Financials",
#                            "Manufacturing",
#                            "Retail",
#                            "Telecom",
#                            "Assets",
#                            "Profit"))

base_con_reg <- base_con %>% 
  recover_types(combined_clean) %>% 
  gather_draws(`b_.*`,regex=T) %>% 
  median_qi %>% 
  mutate(var_rec=fct_recode(.variable,
                            "Very Profitable"="b_combined_perf2gain_much",
                            "Somewhat Profitable"="b_combined_perf2gain_some",
                            "Break Even"="b_combined_perf2break_even",
                            "Lose Some"="b_combined_perf2lose_some",
                            "Assets" = "b_assets",
                            "Connected" = "b_connectedtreatment",
                            "China" = "b_countryChina",
                            "Egypt" = "b_countryEgypt",
                            "Germany" = "b_countryGermany",
                            "Japan" = "b_countryJapan",
                            "Korea" = "b_countryKorea",
                            "Russia" = "b_countryRussia",
                            "Saudi" = "b_countrySaudi",
                            "Ukraine" = "b_countryUkraine",
                            "USA" = "b_countryUSA",
                            "Venezuela" = "b_countryVenezuela",
                            "Employees" = "b_employees",
                            "Intercept" = "b_Intercept",
                            "Private" = "b_ownprivate",
                            "SOE" = "b_ownsoe",
                            "Construction" = "b_sectorconstruction",
                            "Energy" = "b_sectorenergy",
                            "Financials" = "b_sectorfinancials",
                            "Manufacturing" = "b_sectormanufacturing",
                            "Retail" = "b_sectorretail",
                            "Telecom" = "b_sectortelecom",
                            "Years" = "b_years"
  ))

base_con_reg %>% 
  filter(var_rec!="Intercept") %>%
  ggplot(aes(y=.value,x=reorder(var_rec,.value))) +
  geom_pointrange(aes(ymin=.lower,
                      ymax=.upper)) +
  theme_tufte(base_family="") +
  coord_flip() +
  geom_hline(yintercept=0,linetype=2) +
  labs(y="Logit Coefficient",x="",
       caption=stringr::str_wrap("Political connection treatments are collapsed to a single connected vs. un-connected binary treatment. Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution."))

ggsave("base_mod.png",scale=0.7)

# manager only sample

base_con_man_reg <- base_con_man %>% 
  recover_types(combined_clean) %>% 
  gather_draws(`b_.*`,regex=T) %>% 
  median_qi %>% 
  mutate(var_rec=fct_recode(.variable,
                            "Very Profitable"="b_combined_perf2gain_much",
                            "Somewhat Profitable"="b_combined_perf2gain_some",
                            "Break Even"="b_combined_perf2break_even",
                            "Lose Some"="b_combined_perf2lose_some",
                            "Assets" = "b_assets",
                            "Connected" = "b_connectedtreatment",
                            "China" = "b_countryChina",
                            "Egypt" = "b_countryEgypt",
                            "Germany" = "b_countryGermany",
                            "Japan" = "b_countryJapan",
                            "Korea" = "b_countryKorea",
                            "Russia" = "b_countryRussia",
                            "Saudi" = "b_countrySaudi",
                            "Ukraine" = "b_countryUkraine",
                            "USA" = "b_countryUSA",
                            "Venezuela" = "b_countryVenezuela",
                            "Employees" = "b_employees",
                            "Intercept" = "b_Intercept",
                            "Private" = "b_ownprivate",
                            "SOE" = "b_ownsoe",
                            "Construction" = "b_sectorconstruction",
                            "Energy" = "b_sectorenergy",
                            "Financials" = "b_sectorfinancials",
                            "Manufacturing" = "b_sectormanufacturing",
                            "Retail" = "b_sectorretail",
                            "Telecom" = "b_sectortelecom",
                            "Years" = "b_years"
  ))

base_con_man_reg %>% 
  filter(var_rec!="Intercept") %>%
  ggplot(aes(y=.value,x=reorder(var_rec,.value))) +
  geom_pointrange(aes(ymin=.lower,
                      ymax=.upper)) +
  theme_tufte(base_family="") +
  coord_flip() +
  geom_hline(yintercept=0,linetype=2) +
  labs(y="Logit Coefficient",x="",
       caption=stringr::str_wrap("Political connection treatments are collapsed to a single connected vs. un-connected binary treatment. Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution."))

ggsave("base_man_mod.png",scale=0.7)

# sample excluding marginal sectors

# manager only sample

base_con_lim_sector_reg <- base_con_lim_sector %>% 
  recover_types(combined_clean) %>% 
  gather_draws(`b_.*`,regex=T) %>% 
  median_qi %>% 
  mutate(var_rec=fct_recode(.variable,
                            "Very Profitable"="b_combined_perf2gain_much",
                            "Somewhat Profitable"="b_combined_perf2gain_some",
                            "Break Even"="b_combined_perf2break_even",
                            "Lose Some"="b_combined_perf2lose_some",
                            "Assets" = "b_assets",
                            "Connected" = "b_connectedtreatment",
                            "China" = "b_countryChina",
                            "Egypt" = "b_countryEgypt",
                            "Germany" = "b_countryGermany",
                            "Japan" = "b_countryJapan",
                            "Korea" = "b_countryKorea",
                            "Russia" = "b_countryRussia",
                            "Saudi" = "b_countrySaudi",
                            "Ukraine" = "b_countryUkraine",
                            "USA" = "b_countryUSA",
                            "Venezuela" = "b_countryVenezuela",
                            "Employees" = "b_employees",
                            "Intercept" = "b_Intercept",
                            "Private" = "b_ownprivate",
                            "SOE" = "b_ownsoe",
                            "Construction" = "b_sectorconstruction",
                            "Energy" = "b_sectorenergy",
                            "Financials" = "b_sectorfinancials",
                            "Manufacturing" = "b_sectormanufacturing",
                            "Retail" = "b_sectorretail",
                            "Telecom" = "b_sectortelecom",
                            "Years" = "b_years"
  ))

base_con_lim_sector_reg %>% 
  filter(var_rec!="Intercept") %>%
  ggplot(aes(y=.value,x=reorder(var_rec,.value))) +
  geom_pointrange(aes(ymin=.lower,
                      ymax=.upper)) +
  theme_tufte(base_family = "") +
  coord_flip() +
  geom_hline(yintercept=0,linetype=2) +
  labs(y="Logit Coefficient",x="",
       caption=stringr::str_wrap("Political connection treatments are collapsed to a single connected vs. un-connected binary treatment. Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution."))

ggsave("base_lim_sec.png",scale=0.7)

# combine estimates from all three

bind_rows(list(All=base_con_reg,
               Managers=base_con_man_reg,
               Sectors=base_con_lim_sector_reg),
          .id="Model") %>% 
  filter(var_rec=="Connected") %>% 
  ggplot(aes(y=.value,x=reorder(Model,.value))) +
  geom_pointrange(aes(ymin=.lower,
                      ymax=.upper)) +
  theme_tufte(base_family = "") +
  coord_flip() +
  geom_hline(yintercept=0,linetype=2) +
  labs(y="Logit Coefficient",x="",
       caption=stringr::str_wrap("Political connection treatments from different samples (All, Managers only, excluding questionable Sectors) are collapsed to a single connected vs. un-connected binary treatment. Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution."))

ggsave("compare_samples.png",width=4,height=3,scale=1.3)

# let's get conditional effects for subgroup analysis

country_con_int <- conditional_effects(country_con,"country_resp:connected")[[1]] %>% 
   mutate(effect1=fct_recode(effect1__,"Venezuela" = "Venezuela, Bolivarian Republic of..."),
          effect2=fct_recode(effect2__,
                             "Treatment" = "treatment",
                             "Control" = "control"
          ))

country_con_int %>% 
  ggplot(aes(y=estimate__,x=effect1)) +
  geom_pointrange(aes(ymin=lower__,
                      ymax=upper__,
                      colour=effect2,
                      shape=effect2),
                  position=position_dodge(width=.5)) +
  theme_tufte(base_family="") +
  scale_colour_viridis_d() +
  labs(caption=stringr::str_wrap("Plot shows political connection treatment interacted with country intercepts. Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution."),
       x="",y="Predicted Investment Proportion") +
  scale_y_continuous(labels=scales::percent_format(accuracy = 1)) +
  guides(colour=guide_legend(title=""),
         shape=guide_legend(title="")) +
  theme(legend.position = "top")

ggsave("country_int.png",scale=0.7)

pol_int_res <- conditional_effects(pol_int_con,"connected:pol_eff_1",
                                                  int_conditions=list(pol_eff_1=seq(from=-1.5,to=1.5,length.out=100)))[[1]]
require(patchwork)


p1 <- pol_int_res %>% 
    mutate(connected=str_to_sentence(connected)) %>% 
  ggplot(aes(y=estimate__,x=as.numeric(as.character(pol_eff_1)))) +
  geom_ribbon(aes(ymin=lower__,ymax=upper__,fill=connected),alpha=0.5) +
  geom_line(aes(colour=connected)) +
  scale_colour_viridis_d() +
  scale_fill_viridis_d() +
  ggtitle("Level of Connections") +
  theme_tufte(base_family="") +
  guides(colour=guide_legend(title=""),
         fill=guide_legend(title="")) +
  labs(x="",y="Predicted Investment Proportion") +
  theme(legend.position = "top")

pol_con_res <- conditional_effects(pol_con_con,"connected:pol_con_1",
                                                  int_conditions=list(pol_con_1=seq(from=-1.3,to=1.6,length.out=100)))[[1]]


p2 <- pol_con_res %>% 
  mutate(connected=str_to_sentence(connected)) %>% 
  ggplot(aes(y=estimate__,x=as.numeric(as.character(pol_con_1)))) +
  geom_ribbon(aes(ymin=lower__,ymax=upper__,fill=connected),alpha=0.5) +
  geom_line(aes(colour=connected)) +
  ggtitle("Efficacy of Connections") +
  scale_colour_viridis_d() +
  scale_fill_viridis_d() +
  theme_tufte(base_family="") +
  guides(colour=guide_legend(title=""),
         fill=guide_legend(title="")) +
  labs(x="",y="") +
  theme(legend.position = "top")

p1 + p2 + plot_layout(guides="collect") +
  plot_annotation(tag_levels = "A",
                  caption=stringr::str_wrap("Plot shows political connection treatment interacted with the respondent's political connections (A) and the efficacy of those connections (B). Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution.",
                                            width=75)) &
  theme(legend.position = "bottom")

ggsave("pol_con_int.png",scale=0.7)

# produce conditional effects for splines

prof_splines <- conditional_effects(tfp_full,c("tfp","tfp:connected"))

# make a plot for overall tfp

tfp_spline <- prof_splines$tfp %>% 
  ggplot(aes(y=estimate__,x=tfp)) +
  geom_ribbon(aes(ymin=lower__,ymax=upper__),fill="blue",alpha=0.5) +
  geom_line(linetype=2) +
  theme_tufte(base_family="") +
  scale_y_continuous(labels=scales::percent_format(accuracy=1)) +
  labs(x="Investment TFP",y="Proportion Invested") +
  geom_hline(yintercept=0.5,linetype=3)

tfp_treat <- prof_splines$`tfp:connected` %>% 
  mutate(connected=stringr::str_to_sentence(connected)) %>% 
  ggplot(aes(y=estimate__,x=tfp)) +
  geom_ribbon(aes(ymin=lower__,ymax=upper__,
                  fill=connected),alpha=0.5) +
  geom_line(linetype=2,aes(group=connected)) +
  scale_fill_viridis_d() +
  theme_tufte(base_family="") +
  scale_y_continuous(labels=scales::percent_format(accuracy=1)) +
  labs(x="Investment TFP",y="Proportion Invested") +
       # caption=stringr::str_wrap("Plot shows marginal effect at the sample mean of TFP on respondent choosing a profile (A) and TFP interacted with political connection treatment (B). Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
       #                           the intervals are 5% to 95% quantiles of the empirical posterior distribution.",
       #                           width=70)) +
  guides(fill=guide_legend(title="",label.position="bottom")) +
  theme(legend.position = "top") +
  geom_hline(yintercept=0.5,linetype=3)

tfp_treat

ggsave("spline_tfp.png",scale=0.7)

profit_splines <- conditional_effects(profit_full,c("sales_series:connected"))

profit_treat <- profit_splines$`sales_series:connected` %>% 
  mutate(connected=stringr::str_to_sentence(connected)) %>% 
  ggplot(aes(y=estimate__,x=sales_series)) +
  geom_ribbon(aes(ymin=lower__,ymax=upper__,
                  fill=connected),alpha=0.5) +
  geom_line(linetype=2,aes(group=connected)) +
  scale_fill_viridis_d() +
  theme_tufte(base_family="") +
  scale_y_continuous(labels=scales::percent_format(accuracy=1)) +
  scale_x_continuous(labels=scales::dollar_format(accuracy=1)) +
  labs(x="Investment Profit",y="") +
       # caption=stringr::str_wrap("Plot shows the marginal effect at the sample mean of investment profit on a respondent choosing a profile interacted with the political connection treatment. Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
       #                           the intervals are 5% to 95% quantiles of the empirical posterior distribution.",
       #                           width=70)) +
  guides(fill=guide_legend(title="",label.position="bottom")) +
  geom_hline(yintercept=0.5,linetype=3)

profit_treat

ggsave("profit_spline.png")

tfp_treat + profit_treat + plot_layout(guides = "collect") +
  plot_annotation(tag_levels = "A",
                  caption=stringr::str_wrap("Plot shows the marginal effect at the sample mean of investment profile TFP (A) and investment profile profit (B) on a respondent choosing a profile. Estimates are shown conditional on a connected (treatment) profile versus a un-connected (control) profile. Estimates derived from Bayesian logistic regression model with a spline. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution.",
                                            width=90)) &
  theme(legend.position = "bottom")

ggsave("tfp_profit.png")

# connected X sector plot
# better to plot differences in conditional investment shares

con_sector <- conditional_effects(pol_con_sector)

bind_rows(con_sector$sector_1,
          con_sector$`connected:sector_1`) %>% 
   mutate(sector_1=fct_recode(sector_1,
                              "Restaurants" = "ACCOMMODATION AND FOOD SERVICE ACTIVITIES",
                              "Other" = "ACTIVITIES OF EXTRATERRITORIAL ORGANISATIONS AND BODIES",
                              "Domestic" = "ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS- AND SERVICES-PRODUCING ACTIVITIES OF HOUSEHOLDS FOR OWN USE",
                              "Admin" = "ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES",
                              "Agriculture" = "AGRICULTURE, FORESTRY AND FISHING",
                              "Arts" = "ARTS, ENTERTAINMENT AND RECREATION",
                              "Construction" = "CONSTRUCTION",
                              "Education" = "EDUCATION",
                              "Utilities" = "ELECTRICITY, GAS, STEAM AND AIR CONDITIONING SUPPLY",
                              "Finance" = "FINANCIAL AND INSURANCE ACTIVITIES",
                              "Health" = "HUMAN HEALTH AND SOCIAL WORK ACTIVITIES",
                              "ICT" = "INFORMATION AND COMMUNICATION",
                              "Manufacturing" = "MANUFACTURING",
                              "Mining" = "MINING AND QUARRYING",
                              "Other Service" = "OTHER SERVICE ACTIVITIES",
                              "Scientific" = "PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES",
                              "Public Administration" = "PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY",
                              "Real Estate" = "REAL ESTATE ACTIVITIES",
                              "Transportation" = "TRANSPORTATION AND STORAGE",
                              "Water" = "WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES",
                              "Commerce" = "WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES")) %>% 
  mutate(connected=str_to_sentence(connected)) %>% 
  ggplot(aes(y=estimate__,x=reorder(sector_1,estimate__))) +
  geom_pointrange(aes(ymin=lower__,ymax=upper__,colour=connected),
                  position=position_dodge(width=0.5)) +
  scale_colour_grey() +
  scale_fill_grey() +
  theme_tufte(base_family="") +
  guides(colour=guide_legend(title=""),
         fill=guide_legend(title="")) +
  labs(x="",y="Amount Invested",
       caption=stringr::str_wrap("Plot shows the marginal effect at the sample mean of the respondent's company's sector on the share invested in a profile. Estimates are shown conditional on a connected (treatment) profile versus a un-connected (control) profile. Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution.",
                                      width=90)) +
  scale_y_continuous(labels=scales::dollar_format(accuracy=1,scale = 100)) +
  coord_flip() +
  theme(legend.position = "top")

ggsave("sector_diff.png")

# texreg(list(base_con,country_con,pol_int_con,pol_con_con), 
#        file="int_con.tex",scalebox=0.8,float.pos="H",label="basecon",
#        custom.coef.map=list("connectedtreatment"="Connected",
#          "country_respUkraine"="Ukraine",
#                             "country_respVenezuelaBolivarianRepublicof..."="Venezuela",
#                             "connectedtreatment:country_respUkraine"="ConnectedXUkraine",
#                             "connectedtreatment:country_respVenezuelaBolivarianRepublicof..."="ConnectedXVenezuela",
#          "pol_eff_1"="Connections Efficacy",
#                             "connectedtreatment:pol_eff_1"="ConnectedXEfficacy",
#          "pol_con_1"="Prior Connections",
#                             "connectedtreatment:pol_con_1"="ConnectedXPrior"),
#        booktabs=T,dcolumn=T,threeparttable = T,use.packages = FALSE,
#        caption="ATEs for Connection Treatment and Interactions")

# texreg(list(gender,gender_country,gender_perf),booktabs=T,dcolumn=T,
#        file="gender.tex",threeparttable = T,use.packages = FALSE,float.pos="H",label="gender",
#        custom.coef.map=list("genderfemale"="Feminine",
#                             "genderneutral"="Non-Gendered",
#                             "gendermale"="Masculine",
#                             "genderfemale:country_respUkraine"="FeminineXUkraine",
#                             "genderfemale:country_respVenezuelaBolivarianRepublicof..."="FeminineXVenezuela",
#                             "genderneutral:country_respUkraine"="Non-GenderedXUkraine",
#                             "genderneutral:country_respVenezuelaBolivarianRepublicof..."="Non-GenderedXVenezuela",
#                             "gendermale:country_respUkraine"="MasculineXUkraine",
#                             "gendermale:country_respVenezuelaBolivarianRepublicof..."="MasculineXVenezuela",
#                             "moperf"="Performance",
#                             "moperf:genderfemale"="FeminineXPerformance",
#                             "moperf:genderneutral"="Non-GenderedXPerformance"),
#        scalebox=0.7,
#        caption="ATEs for Gendered Political Connections")

# texreg(list(con_profit_updown,
#             con_profit_lm,
#             con_profit_ord),booktabs=T,dcolumn=T,float.pos="H",label="profit",
#        custom.coef.map = list("connectedtreatment"="Connected",
#                               "total_perfup"="Positive Performance",
#                               "connectedtreatment:total_perfup"="ConnectedXPositive",
#                               "squared"="Squared Term",
#                               "linear"="Linear Term",
#                               "squared:linear"="LinearXSquared",
#                               "squared:total_perfup"="SquaredXPositive",
#                               "linear:total_perfup"="LinearXPositive",
#                               "squared:linear:total_perfup"="SquaredXLinearXPositive",
#                               "moperf"="Ordinal Performance",
#                               "moperf:connectedtreatment"="ConnectedXOrdinal Performance"),
#        file="profit.tex",threeparttable = T,use.packages = FALSE,
#        scalebox=0.7,
#        caption="ATEs for Company Performance and Political Connections")
# 
# texreg(con_exprop,booktabs=T,dcolumn=T,float.pos="H",
#        file="exprop.tex",threeparttable = T,use.packages = FALSE,label="exprop",
#        custom.coef.map = list("connectedtreatment"="Connected",
#                               "sum_exprop"="Informal Resolution",
#                               "connectedtreatment:sum_exprop"="ConnectedXInformal"),
#        caption="ATEs for Expropriation and Political Connections")

# plot of expropriation/informal means and connections

whatis <- conditional_effects(con_exprop)

whatis$`sum_exprop:connected` %>% 
  mutate(treatment=recode(`effect2__`,treatment="Treatment",
                          control="Control")) %>% 
  ggplot(aes(y=`estimate__`,x=sum_exprop )) +
  geom_ribbon(aes(ymin=`lower__`,
                  ymax=`upper__`,
                  fill=treatment),alpha=0.5) +
  geom_line(aes(linetype=treatment)) +
  theme_tufte(base_family="") +
  scale_fill_viridis_d() +
  theme(legend.position = "top") +
  xlab("Number of Times Informal Enforcement Used") +
  scale_y_continuous(labels=scales::percent) +
  ylab("Share of Money Invested") + 
  labs(caption=stringr::str_wrap("Plot shows the marginal effect at the sample mean of the respondent's company's use of informal enforcement on the share invested in a profile. Estimates are shown conditional on a connected (treatment) profile versus a un-connected (control) profile. Estimates derived from Bayesian logistic regression model. Point estimates are posterior medians and 
                                 the intervals are 5% to 95% quantiles of the empirical posterior distribution.",
                                 width=90)) +
  guides(fill=guide_legend(title=""),
         linetype=guide_legend(title=""))

ggsave("exprop_treatment.png")  
